*Aggregating emissions of pollutants*

clear all

*cd "/RFS_replication_package/Martinsson_et_al_datasets"

*********
*Please note that the final establishment-level datasets generated by this do file has already been transformed into stata(.dta) files "installation_firm_emissions_2.dta". All the intermediate variables have been removed.
*********



/* (Remove here to get the original codes used in the paper)





use "utslappsdata_scb_cfarnr.dta"   /*Statistics Sweden's emission data on the establishment level*/

sort bidnr year, stable

*Replace missing values to 0, as missing values can cause problems when stata sums
replace kt_CO2foss=0 if kt_CO2foss==. 
replace kt_CO2bio=0 if kt_CO2bio==.   
replace t_SO2=0 if t_SO2==.
replace t_NOx=0 if t_NOx==.
replace t_CH4=0 if t_CH4==.
replace t_N2O=0 if t_N2O==.
replace t_CO=0 if t_CO==.
replace t_NMVOC=0 if t_NMVOC==.
replace t_TSP=0 if t_TSP==.
replace t_PM10=0 if t_PM10==.
replace t_PM25=0 if t_PM25==.
replace t_NH3=0 if t_NH3==.
replace tCO2eq_HFC=0 if tCO2eq_HFC==.
replace tCO2eq_PFC=0 if tCO2eq_PFC==.
replace tCO2eq_SF6=0 if tCO2eq_SF6==.


*Summing up installations to firm-level
*by bidnr year: egen sum_co2pr=sum(pr_co2_kt)
by bidnr year: egen sum_co2foss_new=sum(kt_CO2foss) if bidnr!=. & kt_CO2foss!=.
by bidnr year: egen sum_co2bio=sum(kt_CO2bio) if bidnr!=. & kt_CO2bio!=.
by bidnr year: egen sum_so2=sum(t_SO2) if bidnr!=. & t_SO2!=.
by bidnr year: egen sum_nox=sum(t_NOx) if bidnr!=. & t_NOx!=.
by bidnr year: egen sum_ch4=sum(t_CH4) if bidnr!=. & t_CH4!=.
by bidnr year: egen sum_n2o=sum(t_N2O) if bidnr!=. & t_N2O!=.
by bidnr year: egen sum_co=sum(t_CO) if bidnr!=. & t_CO!=.
by bidnr year: egen sum_nmvoc=sum(t_NMVOC) if bidnr!=. & t_NMVOC!=.
by bidnr year: egen sum_tsp=sum(t_TSP) if bidnr!=. & t_TSP!=.
by bidnr year: egen sum_pm10=sum(t_PM10) if bidnr!=. & t_PM10!=.
by bidnr year: egen sum_pm25=sum(t_PM25) if bidnr!=. & t_PM25!=.
by bidnr year: egen sum_nh3=sum(t_NH3) if bidnr!=. & t_NH3!=.
by bidnr year: egen sum_hfc=sum(tCO2eq_HFC) if bidnr!=. & tCO2eq_HFC!=.
by bidnr year: egen sum_pfc=sum(tCO2eq_PFC) if bidnr!=. & tCO2eq_PFC!=.
by bidnr year: egen sum_sf6=sum(tCO2eq_SF6) if bidnr!=. & tCO2eq_SF6!=.


*Merge with 
merge m:1 bidnr year using emissions_basic.dta /*Fossil heating emissions on the firm-level, provided by Statistics Sweden*/

drop _merge

*We generate the fossil-emission variable based on the firm-level data (that is, emissions_basic.dta) and the installation-level data. We give priority to the firm-level observations, so we use sum of installation-level data when the firm-level alternative is missing
g kt_co2foss_final=0
replace kt_co2foss_final=kt_co2foss if kt_co2foss!=0 & sum_co2foss_new==0
replace kt_co2foss_final=kt_co2foss if kt_co2foss!=0 & sum_co2foss_new!=0
replace kt_co2foss_final=sum_co2foss_new if kt_co2foss==0 & sum_co2foss_new!=0 


merge m:1 bidnr year using eu_ets_sweden.dta /*EU ETS emission data for Swedish establishments, provided by Statistics Sweden*/

keep if _merge==3

replace ets_emission=ets_emission/1000


*There will be missing observations due to the using only (_merge==2) variable when we merged the data with emissions_basic*
drop if bidnr==.
replace pr_co2_kt=0 if pr_co2_kt==.  /*process emissions*/
replace kt_co2foss_final=0 if kt_co2foss_final==.
replace ets_emission=0 if ets_emission==.
g total_Co2=kt_co2foss_final+pr_co2_kt 
replace total_Co2=0 if total_Co2==.

/*nmvoc, pm25, p10 etc. are further substances that damage health*/
replace sum_co2foss_new=0 if sum_co2foss_new==.
replace sum_co2bio=0 if sum_co2bio==.
replace sum_so2=0 if sum_so2==.
replace sum_nox=0 if sum_nox==.
replace sum_ch4=0 if sum_ch4==.
replace sum_n2o=0 if sum_n2o==.
replace sum_co=0 if sum_co==.
replace sum_nmvoc=0 if sum_nmvoc==.
replace sum_tsp=0 if sum_tsp==.
replace sum_pm10=0 if sum_pm10==.
replace sum_pm25=0 if sum_pm25==.
replace sum_nh3=0 if sum_nh3==.
replace sum_hfc=0 if sum_hfc==.
replace sum_pfc=0 if sum_pfc==.
replace sum_sf6=0 if sum_sf6==.




*Fossil-CO2 emissions*
g total_ets_noCO2=0 if year>=2005 & year<=2007 /*In the trial phase, only CO2 was covered*/
replace total_ets_noCO2=sum_n2o*0.298 if year>=2008 & year<=2012  /*Nitrous-oxides were covered in the second phase, we converted the emissions to CO2 equivalents, based on global warming potential*/
replace total_ets_noCO2=sum_n2o*0.298+sum_pfc/1000 if year>=2013

replace pr_co2_kt=0 if pr_co2_kt==.
g kt_co2_fossil_ets=ets_emission-pr_co2_kt-total_ets_noCO2  /*Fossil CO2 emissions under the EU ETS*/

*Defining carbon emissions subject to a carbon tax (not subject to EU ETS)*

g carbon_tax_co2=kt_co2foss-kt_co2_fossil_ets if kt_co2foss-kt_co2_fossil_ets>=0  /*All emissions that do not belong to EU ETS are subject to carbon tax.*/
replace carbon_tax_co2=0 if kt_co2foss-kt_co2_fossil_ets<0 /*If inferred fossil heating CO2 emissions are at least as heating emissions provided by SCB, all the fossil emissions should be under the EU ETS, so carbon tax equals 0.*/

*Merge with tax rates to infer carbon tax payment
merge m:1 year using carbon_tax_rates_new.dta

g carbon_tax=carbon_tax_co2*Tax_rate

*Drop duplicates: we summed up emissions based on installations, but we have not dropped installations*
sort bidnr year, stable
quietly by bidnr year: g dup=cond(_N==1,0,_n)
drop if dup>1
drop dup

drop _merge

merge 1:1 bidnr year using arbetstallen_SNI_codes.dta  /*merging with NACE codes of the establishments*/
drop if _merge==2
drop _merge

g SNI=substr(sni2007_1,1,4) /*generation 4-digit SNI codes*/

bysort  year SNI: egen est_proc=mean(pr_co2_kt) /*estimated process emissions*/

*g eu_carbon_differene=kt_co2_fossil_ets-kt_co2foss

keep bidnr year total_ets_noCO2 ets_emission

drop if bidnr==.

rename ets_emission ets_emission2

g ets_emission=ets_emission2-total_ets_noCO2 /*Estimated EU ETS CO2 emissions*/
replace ets_emission=0 if ets_emission<0

save installation_firm_emissions_2.dta, replace

**Match allowances with emissions, based on EUTL data**
clear all
use "EU_ETS_installation_Sweden.dta" 

sort bidnr year, stable
replace AllowancesinAllocation=0 if AllowancesinAllocation==.
by bidnr year: egen sum_allowance=sum(AllowancesinAllocation) if bidnr!=.

quietly by bidnr year: g dup=cond(_N==1,0,_n)

drop if dup>1
keep bidnr year sum_allowance ComplianceCode	

save eu_ets_allowances.dta, replace

***Match this installation_firm_emissions***
	
clear all

use "installation_firm_emissions.dta" 

merge 1:1 bidnr year using eu_ets_allowances.dta 

keep if _merge==3
drop _merge
replace sum_allowance=sum_allowance/1000

/*Replace missing values with estimates: the allowance allocations were set prior to trading phases, and the contraction pace was quite slowly. So one can use the earlier non-zero values.*/
g id=_n	
	su id, meanonly
		
	quietly{
	local i
	local s=`r(max)'-1
	forvalues i=1(1)`s'{
	local j=`i'+1
replace sum_allowance=sum_allowance[`i'] in `j' if sum_allowance[`i']!=0 & sum_allowance[`j']==0 & bidnr[`i']==bidnr[`j'] 
		
		 } 
}
save installation_firm_emissions_2.dta, replace
*/


use installation_firm_emissions_2.dta, replace